package com.atguigu.cn.wc

import org.apache.flink.api.scala._
import org.apache.flink.api.scala.ExecutionEnvironment

/**
 * @author yangshen
 * @date 2020/4/4 23:10
 */
object WordCount {
  def main(args: Array[String]): Unit = {
    //创建一个批处理的执行环境
    val environment = ExecutionEnvironment.getExecutionEnvironment
    //从文件中读取数据
    val inputPath = "D:\\my\\my_git\\mayun\\miaohui8023\\my-flink\\flink-tutorial\\src\\main\\resources\\hello.txt"
    val inputDataSet = environment.readTextFile(inputPath)
    //分词之后做count
    val wordCountDataSet = inputDataSet.flatMap(_.split(" "))
      //分成一个二元组，前面一个是词
      .map( (_, 1) )
      //按照当前的词来进行分组，按照上面map的第一个元素来分组，0：词的索引位置
      .groupBy(0)
      .sum(1)


    //打印输出
    wordCountDataSet.print()

  }
}
